In [ ]:
import json
import requests
import base64
import io

from openai import AzureOpenAI
from PIL import Image, ImageDraw, ImageFont
from IPython.display import display, Image as IPImage

# Configuration
GPT4V_KEY = "xxx"
ENDPOINT_URL = "xxx"

1. 이미지 오브젝트 분석¶

In [ ]:
file_name = 'watermelon_org.png'
IMAGE_PATH = f"./data/{file_name}"
display(IPImage(filename=IMAGE_PATH))
In [ ]:
# GPT-4o에 이미지를 제공하고 설명을 받는 함수
SYSTEM_PROMPT_PIC_2_TEXT = """
You are a photo analysis expert. The objects in the photos given to you are product pictures taken by retail business owners to sell. 
"""

def describe_image(prompt_text, image_path, api_key, endpoint_url):
    # 이미지 파일을 base64로 인코딩
    encoded_image = base64.b64encode(open(image_path, 'rb').read()).decode('ascii')
    
    headers = {
        "Content-Type": "application/json",
        "api-key": api_key,
    }
    deployment_name = "gpt-4o"

    # Payload for the request
    payload = {
        "messages": [
            {
                "role": "system",
                "content": [
                    {
                        "type": "text",
                        "text": SYSTEM_PROMPT_PIC_2_TEXT
                    }
                ], 
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/png;base64,{encoded_image}"
                        }
                    },
                    {
                        "type": "text",
                        "text": prompt_text
                    }
                ]
            }
        ],
        "temperature": 0.7,
        "top_p": 0.95,
        "max_tokens": 1000
    }

    # Send request
    try:
        response = requests.post(endpoint_url, headers=headers, json=payload)
        response.raise_for_status()  # Will raise an HTTPError if the HTTP request returned an unsuccessful status code
    except requests.RequestException as e:
        raise SystemExit(f"Failed to make the request. Error: {e}")

    # Handle the response
    return response.json()['choices'][0]['message']['content']
In [ ]:
prompt_pic_2_object_description = "Describe the main object in the given photo."
description = describe_image(prompt_text=prompt_pic_2_object_description, image_path=IMAGE_PATH, api_key=GPT4V_KEY, endpoint_url=ENDPOINT_URL)
print(description)
The main object in the given photo is a watermelon. It has a dark green rind with lighter green stripes and is placed inside a clear, protective plastic packaging. The watermelon is round with a small stem attached at the top.

2. Dall-e 3 배경 이미지 생성¶

In [ ]:
prompt_text_2_back_pic = f"""
You are a background image generation expert. When given a description of an object, you have the ability to generate creative and harmonious background images that can be used in advertising posters. Generate a background image that complements the object by referring to the description of the object below. However, DO NOT generate an image of the described object itself.

Description of the object: {text_pic_describe}
"""
prompt_text_2_back_pic
Out[ ]:
'\nYou are a background image generation expert. When given a description of an object, you have the ability to generate creative and harmonious background images that can be used in advertising posters. Generate a background image that complements the object by referring to the description of the object below. However, DO NOT generate an image of the described object itself.\n\nDescription of the object: The main object in the photo is a watermelon. It has a dark green rind with lighter green stripes and appears to be placed inside a protective, clear plastic inflatable cushion. The watermelon is round with a short, curved stem on top.\n'
In [ ]:
client = AzureOpenAI(
    api_version="2024-02-01",
    azure_endpoint="https://kt-hackathon-ai-04.openai.azure.com/",
    api_key=GPT4V_KEY,
)

result = client.images.generate(
    model="dall-e-3", # the name of your DALL-E 3 deployment
    prompt=prompt_text_2_back_pic,
    n=1
)

image_url = json.loads(result.model_dump_json())['data'][0]['url']
print(image_url)
https://dalleproduse.blob.core.windows.net/private/images/27a99101-4fcf-41f1-aa0c-cb5e4eadb64d/generated_00.png?se=2024-07-16T14%3A27%3A40Z&sig=K3TIBhHZAuYHwRFCT0X%2BV%2BBAFsskzp9fwi82rW%2FMXt4%3D&ske=2024-07-19T09%3A36%3A53Z&skoid=09ba021e-c417-441c-b203-c81e5dcd7b7f&sks=b&skt=2024-07-12T09%3A36%3A53Z&sktid=33e01921-4d64-4f8c-a055-5bdaffd5e33d&skv=2020-10-02&sp=r&spr=https&sr=b&sv=2020-10-02
In [ ]:
# 이미지 다운로드
response = requests.get(image_url)
image_data = response.content
image = Image.open(io.BytesIO(image_data))
# 다운로드한 이미지를 파일로 저장
background_image_path = "./data/background_image.jpg"
image.save(background_image_path)

display(IPImage(filename=background_image_path))

3. 이미지 누끼 따기¶

In [ ]:
from rembg import remove
In [ ]:
input_file = IMAGE_PATH
object_image_file = f"./data/object_{file_name}"

img = Image.open(input_file)
out = remove(img)
out.save(object_image_file)
display(IPImage(filename=object_image_file))

4. 배경과 합치기¶

In [ ]:
# 배경 이미지와 누끼가 따진 이미지 경로
foreground_image_path = object_image_file

# 이미지 불러오기
background = Image.open(background_image_path).convert("RGBA")
foreground = Image.open(foreground_image_path).convert("RGBA")

# 전경 이미지를 배경 이미지 위에 합치기
# (background.paste를 사용하여 합치는데, 전경 이미지의 알파 채널을 마스크로 사용)
background.paste(foreground, (0, 0), foreground)

# 합성된 이미지 저장 (필요시)
object_overlap_background_image_path = "./data/object_overlap_background.png"
background.save(object_overlap_background_image_path)
display(IPImage(filename=object_overlap_background_image_path))

5. 카피 문구 생성¶

In [ ]:
prompt_pic_2_copy = """
You are an advertising copywriter and brand naming expert. The image provided to you is a poster of a product that a small business owner wants to sell. Write a copy phrase to be added at the bottom of the image in Korean. Keep it within 10 characters.
"""
In [ ]:
copy_phrase = describe_image(prompt_text=prompt_pic_2_copy, image_path=object_overlap_background_image_path, api_key=GPT4V_KEY, endpoint_url=ENDPOINT_URL)
print(copy_phrase)
수박의 행복

6. 카피 문구 입히기¶

In [ ]:
def add_text_to_image(image_path, text, font_path, font_size, output_path):
    # 이미지 불러오기
    image = Image.open(image_path).convert("RGB")  # JPEG를 위해 RGB 모드로 변환
    draw = ImageDraw.Draw(image)
    
    # 폰트 설정
    font = ImageFont.truetype(font_path, font_size)
    
    # 텍스트 크기 계산
    bbox = draw.textbbox((0, 0), text, font=font)
    text_width = bbox[2] - bbox[0]
    text_height = bbox[3] - bbox[1]
    
    # 텍스트 위치 계산 (하단 중앙)
    image_width, image_height = image.size
    x = (image_width - text_width) / 2
    y = image_height - text_height - 10  # 약간의 마진을 둡니다 (10 픽셀)
    
    # 텍스트 추가
    draw.text((x, y), text, font=font, fill="white")
    
    # 이미지 저장
    image.save(output_path)
In [ ]:
# 사용 예시
font_path = "/Users/dtw/Library/Fonts/malgunbd.ttf"
font_size = 40
output_path = "./data/ad_copy_and_img.jpg"

add_text_to_image(image_path=object_overlap_background_image_path, \
                text=copy_phrase, \
                font_path=font_path, \
                font_size=font_size, \
                output_path=output_path)

display(IPImage(filename=output_path))
In [ ]: